#Adapt the working directory here
setwd("")

load("abunzones.RData")

eyetaxa=read.csv("CPR_Data_ListEyecount.csv")
travtaxa=read.csv("CPR_Data_ListTraverse.csv")
phytotaxa=read.csv("CPR_Data_ListPhyto.csv")

#NOTE: in this script, in addition to taxa sorting, we fuse taxa from
#the eyecount and from the traverse as zooplankton

# STEP 1 remove unpublishable taxa and taxa added too recently:----------------------

toorecent=1958#year from which the taxa must have been counted

abunselectaxa=list()

for (i in names(abunzones)){
  #for zooplankton
  EyeMonths=abunzones[[i]]$EyeByMonths[,which(eyetaxa$Publishable==1&eyetaxa$FirstDisco<=toorecent)]
  TravMonths=abunzones[[i]]$TravByMonths[,which(travtaxa$Publishable==1&travtaxa$FirstDisco<=toorecent)]
  
  #note that we fuse traverse and eyecount into one zooplankton group here
  abunselectaxa[[i]]$Zoo$Months=cbind(EyeMonths,TravMonths)
  
  #same for phytoplankton
  abunselectaxa[[i]]$Phyto$Months=abunzones[[i]]$PhytoByMonths[,which(phytotaxa$Publishable==1&phytotaxa$FirstDisco<=toorecent)]
}



#STEP 2: suppress taxa under some threshold of occurrence---------------------------

thresholdOccur=0.01#classical CPR removal if a taxa occurs in < 1 % of samples
  
ZOOtokeep=integer(0)
PHYTOtokeep=integer(0)

#This first loop puts in ZOOtokeep and PHYTOtokeep the taxa that are recorded
#in more than 1% of sample in any of the standard areas

for (i in names(abunselectaxa)){
  #find the taxa which occur in a greater proportion of samples than the threshold 
  selecEYE=names(which((apply(abunzones[[i]]$Eye>0,2,sum,na.rm=T)/dim(abunzones[[i]]$Eye)[1])>thresholdOccur))
  selecTRAV=names(which((apply(abunzones[[i]]$Trav>0,2,sum,na.rm=T)/dim(abunzones[[i]]$Trav)[1])>thresholdOccur))
  selecPHYTO=names(which((apply(abunzones[[i]]$Phyto>0,2,sum,na.rm=T)/dim(abunzones[[i]]$Phyto)[1])>thresholdOccur))
  
  #Complete ZOOtokeep, which contains the zooplankton taxa are kept in the analysis
  ZOOtokeep=union(ZOOtokeep,c(selecEYE,selecTRAV))
  
  #Complete PHYTOtokeep, which contains the phytoplankton taxa are kept in the analysis
  PHYTOtokeep=union(PHYTOtokeep,selecPHYTO)
}

#This loop updates the abundances by keeping only the selected taxa
for (i in names(abunselectaxa)){
  #ZOO
  abunselectaxa[[i]]$Zoo$Months=abunselectaxa[[i]]$Zoo$Months[,intersect(colnames(abunselectaxa[[i]]$Zoo$Months),ZOOtokeep)]
  
  #PHYTO
  abunselectaxa[[i]]$Phyto$Months=abunselectaxa[[i]]$Phyto$Months[,intersect(colnames(abunselectaxa[[i]]$Phyto$Months),PHYTOtokeep)]
}

#compute the list of remaining taxa:
taxazoo=integer(0)
taxaphyto=integer(0)

for (i in names(abunselectaxa)){
  taxazoo=c(taxazoo,colnames(abunselectaxa[[i]]$Zoo$Months))
  taxaphyto=c(taxaphyto,colnames(abunselectaxa[[i]]$Phyto$Months))
}
taxazoo=names(table(taxazoo))
taxaphyto=names(table(taxaphyto))

#STEP 3: Select taxa for trait based analysis among the remaining-----------------------

#those two table give which taxa are to be included in trait analysis based on the completion
#of trait data + different group delineation (e.g. in phytoplankton, diatoms and dinoflagellates)

traitseleczoo=read.csv("Zoo_Groups.csv")
traitselecphyto=read.csv("Phyto_Groups.csv")

#ZOOPLANKTON
Zootokeep=intersect(taxazoo,traitseleczoo[which(traitseleczoo$Zooplankton==1),"taxon_name"])
Zootokeep
write.csv(Zootokeep,file="Zookept.csv")

#PHYTOPLANKTON
Phytotokeep=intersect(taxaphyto,traitselecphyto[which(traitselecphyto$Phytoplankton==1),"taxon_name"])
Phytotokeep
write.csv(Phytotokeep,file="Phytokept.csv")

#The two csv contain the list of taxa finally included in the analysis
#(see also Supplementary material 1 in published paper)

#apply the new species list

for (i in names(abunselectaxa)){
  #remove species not suitable for traits based approaches
  abunselectaxa[[i]]$Zoo$Months=abunselectaxa[[i]]$Zoo$Months[,intersect(colnames(abunselectaxa[[i]]$Zoo$Months),Zootokeep),drop=F]
   
  abunselectaxa[[i]]$Phyto$Months=abunselectaxa[[i]]$Phyto$Months[,intersect(colnames(abunselectaxa[[i]]$Phyto$Months),Phytotokeep),drop=F]
  
}

#REMOVE EMPTY COMMUNITIES-------------------
for (i in names(abunselectaxa)){
  for (j in c("Zoo","Phyto")){
    for (k in c("Months")){
      truc=which(apply(abunselectaxa[[i]][[j]][[k]],1,sum,na.rm=T)==0)
      if (length(truc)>0){
        abunselectaxa[[i]][[j]][[k]]=abunselectaxa[[i]][[j]][[k]][-truc,]
      }
    }
  }
}


#END AND EXPORT------------------------
save(abunselectaxa,file="abunselectaxa.RData")


